Take Home Exercise 3

VAST Challenge 3: Economics

Yu Di (Singapore Management University)
05-06-2022

1

2

2.1

packages = c('ggiraph', 'plotly', 'DT', 'patchwork', 'gganimate', 'tidyverse', 
             'readxl', 'gifski', 'gapminder', 'tidyverse', 'rmarkdown', 
             'ggdist', 'ggridges', 'patchwork', 'ggthemes', 'hrbrthemes','ggrepel', 
             'ggforce') 
for (p in packages){ 
  if(!require(p, character.only = T)){ 
    install.packages(p) 
  } 
  library(p,character.only = T) }
financial <- read_csv('data/FinancialJournal.csv')
paged_table(financial)
y <- as.POSIXct(financial$timestamp, format="%Y-%m-%d %H:%M:%S")
financial$year <- format(y, format="%Y")
financial$month <- format(y, format="%m")

income <- financial %>%
  filter(category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(year, month) %>%
  summarise(income = mean(amount))

outcome <- financial %>%
  filter(!category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(year, month) %>%
  summarise(outcome = mean(abs(amount)))

total <- merge(income, outcome, by=c('year', 'month'))
total$coef <- total$outcome / total$income
total$date <- paste(total$year, total$month, sep='-')
plot_ly(total, x = ~date, y = ~coef, type = 'scatter',mode = 'lines+markers') %>% layout(title="Trend of Living Standards", 
       xaxis = list(title = "Date"),
       yaxis = list (title = "Coefficient\n(outcome/income)"))            
total$remain <- (total$income - total$outcome)
total$remain <- round(total$remain, 1)
ggplot(data=total, aes(x=date, y=remain)) +
  geom_bar(stat = "identity", width = 0.5, fill="steelblue") +
  coord_cartesian(ylim = c(0, 160)) + 
  labs(y= 'Total Deposit', x= 'Date',
       title = "Trend of Living Standards",
       subtitle = "Highest remaining in 2022-03") +
  geom_text(aes(label = remain), vjust = -1, colour = "black") +
  theme(axis.title.y= element_text(angle=90),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.ticks.x= element_blank(),
        panel.background= element_blank(), 
        axis.line= element_line(color= 'grey'), 
        panel.grid.major.y = element_line(color = "grey",size = 0.5,linetype = 2))

wage <- financial %>%
  filter(category == 'Wage') %>%
  group_by(participantId) %>%
  summarise(wage = mean(amount))
brks <- c(0, 100, 200, 300, 400, Inf)
grps <- c('<=100', '101-200', '201-300', '301-400', '>400')
wage$Wage_Group <- cut(wage$wage, breaks=brks, labels = grps, right = FALSE)

#plot_ly(wage, x = ~wage, type = "histogram")
p <- ggplot(data=wage, aes(x=wage, fill=Wage_Group)) +
    geom_histogram(aes(y = ..density..)) + 
    geom_density(fill="red", alpha = 0.2)

ggplotly(p)
income <- financial %>%
  filter(category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(participantId) %>%
  summarise(income = sum(amount))

outcome <- financial %>%
  filter(!category %in% c('Wage', 'RentAdjustment')) %>%
  group_by(participantId) %>%
  summarise(outcome = sum(abs(amount)))

comparison <- merge(income, outcome, by='participantId') %>%
  merge(wage, by='participantId')
comparison$ratio <- comparison$outcome / comparison$income
ggplot(comparison, aes(x = ratio, y = Wage_Group)) +
  geom_density_ridges(calc_ecdf = TRUE,
                      quantiles = 4, 
                      quantile_lines = TRUE,
                      alpha = .2) +
  theme_ridges() + 
  scale_fill_viridis_d(name = "Quartiles")+
  ggtitle("Distribution of outcome/income Ratio in Different Wage Group")+
  theme(plot.title = element_text(size = 12), legend.position = "top")